core: Add API (and standard concept for) content checksum
authorColin Walters <walters@verbum.org>
Thu, 8 Feb 2018 21:33:18 +0000 (16:33 -0500)
committerAtomic Bot <atomic-devel@projectatomic.io>
Mon, 12 Feb 2018 19:03:18 +0000 (19:03 +0000)
There are a few cases for knowing whether a commit has identical
content to another commit.  Some people want to do a "promotion workflow",
where the content of a commit on a tesitng branch is then "promoted"
to a production branch with `ostree commit --tree=ref`.

Another use case I just hit in rpm-ostree deals with
[jigdo](https://github.com/projectatomic/rpm-ostree/issues/1081) where we're
importing RPMs on both the client and server, and will be using the
content checksum, since the client/server cases inject different metadata
into the commit object.

Closes: https://github.com/ostreedev/ostree/issues/1315
Closes: #1449
Approved by: jlebon

apidoc/ostree-sections.txt
src/libostree/libostree-devel.sym
src/libostree/ostree-core.c
src/libostree/ostree-core.h
src/ostree/ot-dump.c
tests/basic-test.sh

index d3cf1c68f788f04ba089ab1496f32678b117d78d..4421ed10d61e82ea01822b7d38cb74b6454a7dc7 100644 (file)
@@ -149,6 +149,7 @@ ostree_validate_structureof_dirtree
 ostree_validate_structureof_dirmeta
 ostree_commit_get_parent
 ostree_commit_get_timestamp
+ostree_commit_get_content_checksum
 ostree_check_version
 </SECTION>
 
index cdd6b7ca162d3e8aa80b08eeeaab022fea9ea3cb..468fbc500261208a139a981a54651fa6d268976e 100644 (file)
@@ -19,6 +19,7 @@
 
 /* Add new symbols here.  Release commits should copy this section into -released.sym. */
 LIBOSTREE_2018.2 {
+  ostree_commit_get_content_checksum;
 } LIBOSTREE_2018.1;
 
 /* Stub section for the stable release *after* this development one; don't
index f35714ce86c8b40c2e9cc587c70303ebf9df0a2a..ba790dc7299f9001f74c9a1257f2ef3e016b74e4 100644 (file)
@@ -2371,6 +2371,51 @@ ostree_commit_get_timestamp (GVariant  *commit_variant)
   return GUINT64_FROM_BE (ret);
 }
 
+
+/**
+ * ostree_commit_get_content_checksum:
+ * @commit_variant: A commit object
+ *
+ * There are use cases where one wants a checksum just of the content of a
+ * commit. OSTree commits by default capture the current timestamp, and may have
+ * additional metadata, which means that re-committing identical content
+ * often results in a new checksum.
+ *
+ * By comparing checksums of content, it's possible to easily distinguish
+ * cases where nothing actually changed.
+ *
+ * The content checksums is simply defined as `SHA256(root dirtree_checksum || root_dirmeta_checksum)`,
+ * i.e. the SHA-256 of the root "dirtree" object's checksum concatenated with the
+ * root "dirmeta" checksum (both in binary form, not hexadecimal).
+ *
+ * Returns: (nullable): A SHA-256 hex string, or %NULL if @commit_variant is not well-formed
+ */
+gchar *
+ostree_commit_get_content_checksum (GVariant *commit_variant)
+{
+  g_auto(OtChecksum) checksum = { 0, };
+  ot_checksum_init (&checksum);
+
+  g_autoptr(GVariant) tree_contents_csum = NULL;
+  g_autoptr(GVariant) tree_meta_csum = NULL;
+
+  g_variant_get_child (commit_variant, 6, "@ay", &tree_contents_csum);
+  g_variant_get_child (commit_variant, 7, "@ay", &tree_meta_csum);
+
+  const guchar *bytes;
+  bytes = ostree_checksum_bytes_peek_validate (tree_contents_csum, NULL);
+  if (!bytes)
+    return NULL;
+  ot_checksum_update (&checksum, bytes, OSTREE_SHA256_DIGEST_LEN);
+  bytes = ostree_checksum_bytes_peek_validate (tree_meta_csum, NULL);
+  if (!bytes)
+    return NULL;
+  ot_checksum_update (&checksum, bytes, OSTREE_SHA256_DIGEST_LEN);
+  char hexdigest[OSTREE_SHA256_STRING_LEN+1];
+  ot_checksum_get_hexdigest (&checksum, hexdigest, sizeof (hexdigest));
+  return g_strdup (hexdigest);
+}
+
 /* Used in pull/deploy to validate we're not being downgraded */
 gboolean
 _ostree_compare_timestamps (const char   *current_rev,
index d38175f3fa1623a95bddf04e9c2a8d140d52e5d8..018f5070dd209d4b81ce2c53dab9c62a98b1f5f6 100644 (file)
@@ -520,6 +520,9 @@ gchar *  ostree_commit_get_parent            (GVariant  *commit_variant);
 _OSTREE_PUBLIC
 guint64  ostree_commit_get_timestamp         (GVariant  *commit_variant);
 
+_OSTREE_PUBLIC
+gchar *  ostree_commit_get_content_checksum  (GVariant  *commit_variant);
+
 _OSTREE_PUBLIC
 gboolean ostree_check_version (guint required_year, guint required_release);
 
index 6ef7bbf7e39e892d11ea4c19ba3f8dd45151910f..1ef63740eaa35cdab7dfdd6a7d9f422b3d6a4266 100644 (file)
@@ -126,6 +126,8 @@ dump_commit (GVariant            *variant,
   str = format_timestamp (timestamp, &local_error);
   if (!str)
     errx (1, "Failed to read commit: %s", local_error->message);
+  g_autofree char *contents = ostree_commit_get_content_checksum (variant) ?: "<invalid commit>";
+  g_print ("ContentChecksum:  %s\n", contents);
   g_print ("Date:  %s\n", str);
 
   if ((version = ot_admin_checksum_version (variant)))
index 530efd0a137d5de92898847aa9e0f63c9fad0f05..3376ac58cbbf4f09e8ac54f13194d6abbefddb6e 100644 (file)
@@ -21,7 +21,7 @@
 
 set -euo pipefail
 
-echo "1..$((81 + ${extra_basic_tests:-0}))"
+echo "1..$((82 + ${extra_basic_tests:-0}))"
 
 CHECKOUT_U_ARG=""
 CHECKOUT_H_ARGS="-H"
@@ -755,6 +755,16 @@ assert_file_has_content show-output "Third commit"
 assert_file_has_content show-output "commit $checksum"
 echo "ok show full output"
 
+grep -E -e '^ContentChecksum' show-output > previous-content-checksum.txt
+cd $test_tmpdir/checkout-test2
+checksum=$($OSTREE commit ${COMMIT_ARGS} -b test4 -s "Another commit with different subject")
+cd ${test_tmpdir}
+$OSTREE show test4 | grep -E -e '^ContentChecksum' > new-content-checksum.txt
+if ! diff -u previous-content-checksum.txt new-content-checksum.txt; then
+    fatal "content checksum differs"
+fi
+echo "ok content checksum"
+
 cd $test_tmpdir/checkout-test2
 checksum1=$($OSTREE commit ${COMMIT_ARGS} -b test5 -s "First commit")
 checksum2=$($OSTREE commit ${COMMIT_ARGS} -b test5 -s "Second commit")